First, we need to associate the ecomorph to the species we have. This information is in the original .csv that Isabel sent us with the information of every sample. Ignasi compiled the information on the ecomorphs from different references, creating the following summary table.
library(knitr)
#our information
MORPH <- read.table("/gata/mar/cophyhopa/results/2022-12-23/eco_sp.txt", col.names = c("ecomorph", "species"))
kable(MORPH)
| ecomorph | species |
|---|---|
| Albock | acrinasus |
| Balchen1 | alpinus |
| Balchen1/Balchen2 | alpinus/steinmanni |
| Balchen2 | brienzii |
| Balchen2/Felchen | brienzii/fatioi |
| Balchen2 | steinmanni |
| Bondelle | confusus |
| Brienzlig | albellus |
| - | duplex |
| Felchen/Brienzlig | fatioi/albellus |
| Felchen | fatioi |
| - | heglingus |
| Kropfer | profundus |
| - | zuerichensis |
| L | LAN_L |
| P | LAN_P |
| D | LAN_D |
| L | SUO_L |
| P | SUO_P |
#ignasi table
ECOM <- read.table("/gata/mar/cophyhopa/data/ecomorphs.txt", header = TRUE)
kable(ECOM)
| Lake | Species | Ecomorph |
|---|---|---|
| Brienz-Thun | C.albellus | Albeli |
| Brienz-Thun | C.fatioi | Felchen |
| Brienz | C.brienzii | Felchen |
| Brienz-Thun | C.alpinus | Balchen |
| Thun | C.steinmanni | Balchen |
| Thun | C.profundus | Benthicprofundal |
| Constance | C.arenincolus | Balchen |
| Constance | C.macrophthalmus | Felchen |
| Thun | C.acrinasus | Largepelagic |
| Constance | C.wartmanni | Largepelagic |
| Luzern | C.suspensus | Largepelagic |
| Luzern | C.nobilis | Pelagicprofundal |
| Luzern | C.muelleri | Albeli |
| Luzern | C.litoralis | Balchen |
| Luzern | C.sp.Alpnacherfelchen | Felchen |
| Luzern | C.intermundia | Felchen |
| Walen-Zurich | C.duplex | Balchen |
| Zurich | C.zuerichensis | Felchen |
| Neuchatel | C.candidus | Albeli |
| Biel | C.confusus | Albeli |
| Biel-Neuchatel | C.palaea | Balchen |
| Drewitz | C.holsatus | Balchen |
| Muddus | C.lavaretus | NA |
| Shaal | C.maraenoides | Albeli |
| Langfjordvatn | C.lavaretus_Lang_D | D |
| Langfjordvatn | C.lavaretus_Lang_L | L |
| Langfjordvatn | C.lavaretus_Lang_P | P |
| Suopatjavri | C.lavaretus_Suo_L | L |
| Suopatjavri | C.lavaretus_Suo_P | P |
Using the information from both tables (and further considering the information provided by Ignasi's references), we choose SUO_P and SUO_L (SUO lake), LAN_L and LAN_P (LAN lake) for the lakes in Norway. But for alpine lakes, the thing is that most of them are connected, so species are present in different lakes. We chose the species C.fatioi (Felchen ecomorph) present in Lakes Thun and Brienz, C.steinmani (Balchen ecomorph) present in Lake Thun, C.duplex (Balchen ecomorph) and C.zuerichensis (Felchen ecomorph) present in lakes Walen and Zurich, C.albellus (Albeli ecomorph) present in Lakes Thun and Brienz and, finally, C.confusus (Albeli ecomorph) present in Lake Bienne. So, we would be comparing Thun-Brienz vs. Walen-Zurich and Thun-Brienz v. Bienne for the alpine region.
Then the lake that shares the same ecomorph with another lake is taken into account for the next round of Fst stats to be calculated. To do that, we need to create a txt file with the names of each individual in a lake that shares the same ecomorph.
I am going to take the txt files previously created in the 2022-12-12 folder with the individuals we need: LAN_L, LAN_P, SUO_P, SUO_L, C.fatioi, C.steinmani, C.duplex, C.zuerichensis, C.albellus and C.confusus.
Ecomorph = c("Albeli", "Felchen", "Balchen", "P (Profundal)", "L (Litoral)")
Species = c("C.albellus (Thun-Brienz) / C.confusus (Bienne)", "C.fatioi (Thun-Brienz) / C.zuerichensis (Walen-Zurich)",
"C.steinmani (Thun) / C.duplex (Walen-Zurich)", "P (LAN) / P (SUO)", "L (LAN) / L (SUO)")
SUM <- data.frame(Ecomorph, Species)
kable(SUM)
| Ecomorph | Species |
|---|---|
| Albeli | C.albellus (Thun-Brienz) / C.confusus (Bienne) |
| Felchen | C.fatioi (Thun-Brienz) / C.zuerichensis (Walen-Zurich) |
| Balchen | C.steinmani (Thun) / C.duplex (Walen-Zurich) |
| P (Profundal) | P (LAN) / P (SUO) |
| L (Litoral) | L (LAN) / L (SUO) |
VCF='/gata/mar/cophyhopa/results/2022-04-22/assem2_outfiles/assem2.vcf'
if [ ! -e dirty.recode.vcf ]; then
vcftools --vcf $VCF \
--out dirty \
--remove ~/cophyhopa/results/2022-05-04/TheWorst.txt \
--recode \
--recode-INFO-all
fi
vcftools --vcf dirty.recode.vcf \
--out ./FST/Felchen_Alpine \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.fatioi.txt \
--weir-fst-pop C.zuerichensis.txt
vcftools --vcf dirty.recode.vcf \
--out ./FST/Felchen_Alpine_w \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.fatioi.txt \
--weir-fst-pop C.zuerichensis.txt \
--fst-window-size 250000 \
--fst-window-step 50000
##
## VCFtools - 0.1.17
## (C) Adam Auton and Anthony Marcketta 2009
##
## Parameters as interpreted:
## --vcf dirty.recode.vcf
## --weir-fst-pop C.fatioi.txt
## --weir-fst-pop C.zuerichensis.txt
## --keep C.fatioi.txt
## --keep C.zuerichensis.txt
## --mac 2
## --max-alleles 2
## --max-meanDP 1e+03
## --thin 500
## --max-missing 1
## --out ./FST/Felchen_Alpine
## --remove-indels
##
## Keeping individuals in 'keep' list
## After filtering, kept 27 out of 271 Individuals
## Outputting Weir and Cockerham Fst estimates.
## Weir and Cockerham mean Fst estimate: 0.10873
## Weir and Cockerham weighted Fst estimate: 0.16408
## After filtering, kept 25709 out of a possible 879238 Sites
## Run Time = 45.00 seconds
##
## VCFtools - 0.1.17
## (C) Adam Auton and Anthony Marcketta 2009
##
## Parameters as interpreted:
## --vcf dirty.recode.vcf
## --fst-window-size 250000
## --fst-window-step 50000
## --weir-fst-pop C.fatioi.txt
## --weir-fst-pop C.zuerichensis.txt
## --keep C.fatioi.txt
## --keep C.zuerichensis.txt
## --mac 2
## --max-alleles 2
## --max-meanDP 1e+03
## --thin 500
## --max-missing 1
## --out ./FST/Felchen_Alpine_w
## --remove-indels
##
## Keeping individuals in 'keep' list
## After filtering, kept 27 out of 271 Individuals
## Outputting Windowed Weir and Cockerham Fst estimates.
## Weir and Cockerham mean Fst estimate: 0.10873
## Weir and Cockerham weighted Fst estimate: 0.16408
## After filtering, kept 25709 out of a possible 879238 Sites
## Run Time = 44.00 seconds
vcftools --vcf dirty.recode.vcf \
--out ./FST/Balchen_Alpine \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.steinmanni.txt \
--weir-fst-pop C.duplex.txt
vcftools --vcf dirty.recode.vcf \
--out ./FST/Balchen_Alpine_w \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.steinmanni.txt \
--weir-fst-pop C.duplex.txt \
--fst-window-size 250000 \
--fst-window-step 50000
##
## VCFtools - 0.1.17
## (C) Adam Auton and Anthony Marcketta 2009
##
## Parameters as interpreted:
## --vcf dirty.recode.vcf
## --weir-fst-pop C.steinmanni.txt
## --weir-fst-pop C.duplex.txt
## --keep C.steinmanni.txt
## --keep C.duplex.txt
## --mac 2
## --max-alleles 2
## --max-meanDP 1e+03
## --thin 500
## --max-missing 1
## --out ./FST/Balchen_Alpine
## --remove-indels
##
## Keeping individuals in 'keep' list
## After filtering, kept 25 out of 271 Individuals
## Outputting Weir and Cockerham Fst estimates.
## Weir and Cockerham mean Fst estimate: 0.092176
## Weir and Cockerham weighted Fst estimate: 0.13099
## After filtering, kept 20729 out of a possible 879238 Sites
## Run Time = 44.00 seconds
##
## VCFtools - 0.1.17
## (C) Adam Auton and Anthony Marcketta 2009
##
## Parameters as interpreted:
## --vcf dirty.recode.vcf
## --fst-window-size 250000
## --fst-window-step 50000
## --weir-fst-pop C.steinmanni.txt
## --weir-fst-pop C.duplex.txt
## --keep C.steinmanni.txt
## --keep C.duplex.txt
## --mac 2
## --max-alleles 2
## --max-meanDP 1e+03
## --thin 500
## --max-missing 1
## --out ./FST/Balchen_Alpine_w
## --remove-indels
##
## Keeping individuals in 'keep' list
## After filtering, kept 25 out of 271 Individuals
## Outputting Windowed Weir and Cockerham Fst estimates.
## Weir and Cockerham mean Fst estimate: 0.092176
## Weir and Cockerham weighted Fst estimate: 0.13099
## After filtering, kept 20729 out of a possible 879238 Sites
## Run Time = 43.00 seconds
vcftools --vcf dirty.recode.vcf \
--out ./FST/Albeli_Alpine \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.albellus.txt \
--weir-fst-pop C.confusus.txt
vcftools --vcf dirty.recode.vcf \
--out ./FST/Albeli_Alpine_w \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop C.albellus.txt \
--weir-fst-pop C.confusus.txt \
--fst-window-size 250000 \
--fst-window-step 50000
##
## VCFtools - 0.1.17
## (C) Adam Auton and Anthony Marcketta 2009
##
## Parameters as interpreted:
## --vcf dirty.recode.vcf
## --weir-fst-pop C.albellus.txt
## --weir-fst-pop C.confusus.txt
## --keep C.albellus.txt
## --keep C.confusus.txt
## --mac 2
## --max-alleles 2
## --max-meanDP 1e+03
## --thin 500
## --max-missing 1
## --out ./FST/Albeli_Alpine
## --remove-indels
##
## Keeping individuals in 'keep' list
## After filtering, kept 42 out of 271 Individuals
## Outputting Weir and Cockerham Fst estimates.
## Weir and Cockerham mean Fst estimate: 0.094689
## Weir and Cockerham weighted Fst estimate: 0.14007
## After filtering, kept 13208 out of a possible 879238 Sites
## Run Time = 43.00 seconds
##
## VCFtools - 0.1.17
## (C) Adam Auton and Anthony Marcketta 2009
##
## Parameters as interpreted:
## --vcf dirty.recode.vcf
## --fst-window-size 250000
## --fst-window-step 50000
## --weir-fst-pop C.albellus.txt
## --weir-fst-pop C.confusus.txt
## --keep C.albellus.txt
## --keep C.confusus.txt
## --mac 2
## --max-alleles 2
## --max-meanDP 1e+03
## --thin 500
## --max-missing 1
## --out ./FST/Albeli_Alpine_w
## --remove-indels
##
## Keeping individuals in 'keep' list
## After filtering, kept 42 out of 271 Individuals
## Outputting Windowed Weir and Cockerham Fst estimates.
## Weir and Cockerham mean Fst estimate: 0.094689
## Weir and Cockerham weighted Fst estimate: 0.14007
## After filtering, kept 13208 out of a possible 879238 Sites
## Run Time = 43.00 seconds
vcftools --vcf dirty.recode.vcf \
--out ./FST/P_Arctic \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop LAN_P.txt \
--weir-fst-pop SUO_P.txt
vcftools --vcf dirty.recode.vcf \
--out ./FST/P_Arctic_w \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop LAN_P.txt \
--weir-fst-pop SUO_P.txt \
--fst-window-size 250000 \
--fst-window-step 50000
##
## VCFtools - 0.1.17
## (C) Adam Auton and Anthony Marcketta 2009
##
## Parameters as interpreted:
## --vcf dirty.recode.vcf
## --weir-fst-pop LAN_P.txt
## --weir-fst-pop SUO_P.txt
## --keep LAN_P.txt
## --keep SUO_P.txt
## --mac 2
## --max-alleles 2
## --max-meanDP 1e+03
## --thin 500
## --max-missing 1
## --out ./FST/P_Arctic
## --remove-indels
##
## Keeping individuals in 'keep' list
## After filtering, kept 25 out of 271 Individuals
## Outputting Weir and Cockerham Fst estimates.
## Weir and Cockerham mean Fst estimate: 0.1815
## Weir and Cockerham weighted Fst estimate: 0.26221
## After filtering, kept 25628 out of a possible 879238 Sites
## Run Time = 43.00 seconds
##
## VCFtools - 0.1.17
## (C) Adam Auton and Anthony Marcketta 2009
##
## Parameters as interpreted:
## --vcf dirty.recode.vcf
## --fst-window-size 250000
## --fst-window-step 50000
## --weir-fst-pop LAN_P.txt
## --weir-fst-pop SUO_P.txt
## --keep LAN_P.txt
## --keep SUO_P.txt
## --mac 2
## --max-alleles 2
## --max-meanDP 1e+03
## --thin 500
## --max-missing 1
## --out ./FST/P_Arctic_w
## --remove-indels
##
## Keeping individuals in 'keep' list
## After filtering, kept 25 out of 271 Individuals
## Outputting Windowed Weir and Cockerham Fst estimates.
## Weir and Cockerham mean Fst estimate: 0.1815
## Weir and Cockerham weighted Fst estimate: 0.26221
## After filtering, kept 25628 out of a possible 879238 Sites
## Run Time = 44.00 seconds
vcftools --vcf dirty.recode.vcf \
--out ./FST/L_Arctic \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop LAN_L.txt \
--weir-fst-pop SUO_L.txt
vcftools --vcf dirty.recode.vcf \
--out ./FST/L_Arctic_w \
--thin 500 \
--remove-indels \
--mac 2 \
--max-alleles 2 \
--max-meanDP 1000 \
--max-missing 1 \
--weir-fst-pop LAN_L.txt \
--weir-fst-pop SUO_L.txt \
--fst-window-size 250000 \
--fst-window-step 50000
##
## VCFtools - 0.1.17
## (C) Adam Auton and Anthony Marcketta 2009
##
## Parameters as interpreted:
## --vcf dirty.recode.vcf
## --weir-fst-pop LAN_L.txt
## --weir-fst-pop SUO_L.txt
## --keep LAN_L.txt
## --keep SUO_L.txt
## --mac 2
## --max-alleles 2
## --max-meanDP 1e+03
## --thin 500
## --max-missing 1
## --out ./FST/L_Arctic
## --remove-indels
##
## Keeping individuals in 'keep' list
## After filtering, kept 83 out of 271 Individuals
## Outputting Weir and Cockerham Fst estimates.
## Weir and Cockerham mean Fst estimate: 0.14006
## Weir and Cockerham weighted Fst estimate: 0.22312
## After filtering, kept 18441 out of a possible 879238 Sites
## Run Time = 47.00 seconds
##
## VCFtools - 0.1.17
## (C) Adam Auton and Anthony Marcketta 2009
##
## Parameters as interpreted:
## --vcf dirty.recode.vcf
## --fst-window-size 250000
## --fst-window-step 50000
## --weir-fst-pop LAN_L.txt
## --weir-fst-pop SUO_L.txt
## --keep LAN_L.txt
## --keep SUO_L.txt
## --mac 2
## --max-alleles 2
## --max-meanDP 1e+03
## --thin 500
## --max-missing 1
## --out ./FST/L_Arctic_w
## --remove-indels
##
## Keeping individuals in 'keep' list
## After filtering, kept 83 out of 271 Individuals
## Outputting Windowed Weir and Cockerham Fst estimates.
## Weir and Cockerham mean Fst estimate: 0.14006
## Weir and Cockerham weighted Fst estimate: 0.22312
## After filtering, kept 18441 out of a possible 879238 Sites
## Run Time = 47.00 seconds
library(ggplot2)
setwd("~/cophyhopa/results/2022-12-23")
FELCHEN <- read.table("./FST/Felchen_Alpine.weir.fst", header = TRUE)
CHR <- as.character(unique(FELCHEN$CHROM))
ggplot(data = FELCHEN,
mapping = aes(x = CHROM, y = WEIR_AND_COCKERHAM_FST)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
for (i in 1:length(CHR)) {
print(ggplot(data = FELCHEN[which(FELCHEN$CHROM == CHR[i]), ],
mapping = aes(x = POS, y = WEIR_AND_COCKERHAM_FST)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)))
}
library(ggplot2)
setwd("~/cophyhopa/results/2022-12-23")
BALCHEN <- read.table("./FST/Balchen_Alpine.weir.fst", header = TRUE)
CHR <- as.character(unique(BALCHEN$CHROM))
ggplot(data = BALCHEN,
mapping = aes(x = CHROM, y = WEIR_AND_COCKERHAM_FST)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
for (i in 1:length(CHR)) {
print(ggplot(data = BALCHEN[which(BALCHEN$CHROM == CHR[i]), ],
mapping = aes(x = POS, y = WEIR_AND_COCKERHAM_FST)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)))
}
library(ggplot2)
setwd("~/cophyhopa/results/2022-12-23")
ALBELI <- read.table("./FST/Albeli_Alpine.weir.fst", header = TRUE)
CHR <- as.character(unique(ALBELI$CHROM))
ggplot(data = ALBELI,
mapping = aes(x = CHROM, y = WEIR_AND_COCKERHAM_FST)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
for (i in 1:length(CHR)) {
print(ggplot(data = ALBELI[which(ALBELI$CHROM == CHR[i]), ],
mapping = aes(x = POS, y = WEIR_AND_COCKERHAM_FST)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)))
}
library(ggplot2)
setwd("~/cophyhopa/results/2022-12-23")
PROF <- read.table("./FST/P_Arctic.weir.fst", header = TRUE)
CHR <- as.character(unique(FELCHEN$CHROM))
ggplot(data = PROF,
mapping = aes(x = CHROM, y = WEIR_AND_COCKERHAM_FST)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
for (i in 1:length(CHR)) {
print(ggplot(data = PROF[which(PROF$CHROM == CHR[i]), ],
mapping = aes(x = POS, y = WEIR_AND_COCKERHAM_FST)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)))
}
library(ggplot2)
setwd("~/cophyhopa/results/2022-12-23")
LITO <- read.table("./FST/L_Arctic.weir.fst", header = TRUE)
CHR <- as.character(unique(FELCHEN$CHROM))
ggplot(data = LITO,
mapping = aes(x = CHROM, y = WEIR_AND_COCKERHAM_FST)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
for (i in 1:length(CHR)) {
print(ggplot(data = LITO[which(LITO$CHROM == CHR[i]), ],
mapping = aes(x = POS, y = WEIR_AND_COCKERHAM_FST)) +
geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)))
}